library(Seurat)Attaching SeuratObject
library(cowplot)
library(ggplot2)library(Seurat)Attaching SeuratObject
library(cowplot)
library(ggplot2)dataDirs <- list(
day60_old = list(
cellRanger = "/srv/gstore4users/p29781/o29804_CellRangerCount_2022-11-04--11-20-44/day60oticdiff_Library_418886_1",
seuratReport = "/srv/gstore4users/p29781/o29804_ScSeurat_2022-11-08--10-01-51/day60oticdiff_Library_418886_1_SCReport",
labeledObject = "/srv/gstore4users/p29781/additional-analyses/2022-11-28-relabel-day60"
),
day60_new = list(
cellRanger = "/srv/gstore4users/p29781/o30306_CellRangerCount_2022-12-20--11-08-02/IEOday60_controlprotocol",
scData = "/srv/gstore4users/p29781/o30306_ScSeurat_2022-12-20--22-03-42/IEOday60_controlprotocol_SCReport")
)
dataDirs$day60_old
$day60_old$cellRanger
[1] "/srv/gstore4users/p29781/o29804_CellRangerCount_2022-11-04--11-20-44/day60oticdiff_Library_418886_1"
$day60_old$seuratReport
[1] "/srv/gstore4users/p29781/o29804_ScSeurat_2022-11-08--10-01-51/day60oticdiff_Library_418886_1_SCReport"
$day60_old$labeledObject
[1] "/srv/gstore4users/p29781/additional-analyses/2022-11-28-relabel-day60"
$day60_new
$day60_new$cellRanger
[1] "/srv/gstore4users/p29781/o30306_CellRangerCount_2022-12-20--11-08-02/IEOday60_controlprotocol"
$day60_new$scData
[1] "/srv/gstore4users/p29781/o30306_ScSeurat_2022-12-20--22-03-42/IEOday60_controlprotocol_SCReport"
resultDir <- "/scratch/jcarreno/sta426_project/results"First we load the data from the server and we identify the source of each dataset
oldAnalysis <- readRDS(file = paste(dataDirs$day60_old$seuratReport, "/scData.rds",sep = ""))
newAnalysis <- readRDS(file = paste(dataDirs$day60_new$scData, "/scData.rds",sep = ""))oldAnalysis$experiment <- "OLD"
newAnalysis$experiment <- "NEW"Now, we can merge the datasets using FindIntegrationAnchors() and IntegrateData() from Seurat package.
anchors <- FindIntegrationAnchors(object.list = list(oldAnalysis, newAnalysis), dims = 1:20)Computing 2000 integration features
Scaling features for provided objects
Finding all pairwise anchors
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 8166 anchors
Filtering anchors
Retained 2798 anchors
combined <- IntegrateData(anchorset = anchors, dims = 1:20)Merging dataset 1 into 2
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
Integrating data
#combined <- readRDS(file = "combinedSeurat.rds")We can perform now basic visual inspection to check if the integration has been done properly
DefaultAssay(combined) <- "integrated"# Run the standard workflow for visualization and clustering
combined <- ScaleData(combined, verbose = FALSE)
combined <- RunPCA(combined, npcs = 30, verbose = FALSE)
ElbowPlot(combined)combined <- RunUMAP(combined, reduction = "pca", dims = 1:20)Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
20:14:02 UMAP embedding parameters a = 0.9922 b = 1.112
20:14:02 Read 12137 rows and found 20 numeric columns
20:14:02 Using Annoy for neighbor search, n_neighbors = 30
20:14:02 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
20:14:04 Writing NN index file to temp file /tmp/RtmpNL02fK/file6e3a5c5ee0ed
20:14:04 Searching Annoy index using 1 thread, search_k = 3000
20:14:10 Annoy recall = 100%
20:14:11 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
20:14:12 Initializing from normalized Laplacian + noise (using irlba)
20:14:13 Commencing optimization for 200 epochs, with 494078 positive edges
20:14:20 Optimization finished
combined <- FindNeighbors(combined, reduction = "pca", dims = 1:20, k.param = 9)Computing nearest neighbor graph
Computing SNN
combined <- FindClusters(combined, resolution = 0.5)Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12137
Number of edges: 138573
Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9188
Number of communities: 21
Elapsed time: 0 seconds
p1 <- DimPlot(combined, reduction = "umap", group.by = "experiment")
p2 <- DimPlot(combined, reduction = "umap", label = TRUE)
plot_grid(p1, p2)DimPlot(combined, reduction = "umap", split.by = "experiment")Save the combined data
#saveRDS(combined, file = "combinedSeurat.rds")Note: MYO7A is a gene used by Steinhard et al. However, it does not appear in our analysis as it has likely been discarded due to its low presence during QC or during the merging of both datasets.
ghc <- c('ATOH1',
'MYO7A',
'MYO6',
'PCP4',
'ANXA4',
'GFI1',
'ESPN',
'TMPRSS3',
'BDNF',
'CCER2',
'GNG8',
'POU4F3',
'OTOF',
'FSIP1',
'ZBBX',
'SKOR1',
'DNAH5',
'SCL26A5',
'GATA3',
'LMOD3',
'FGF8',
'INSM1',
'DNM3'
)for (marker in ghc){
tryCatch({print(FeaturePlot(combined, reduction = "umap", features = marker, order= TRUE))},
error=function(e){print("ERROR")},
warning=function(w){print(paste(marker, "not present"))})
}[1] "MYO7A not present"
[1] "ANXA4 not present"
[1] "GFI1 not present"
[1] "ESPN not present"
[1] "TMPRSS3 not present"
[1] "BDNF not present"
[1] "FSIP1 not present"
[1] "SKOR1 not present"
[1] "SCL26A5 not present"
[1] "LMOD3 not present"
[1] "FGF8 not present"
Show subset of clusters (small clusters, those that are likely to be of interest)
DoHeatmap(combined, features = ghc, cells = WhichCells(combined, idents = c("8","9","10","11","12","13","14","15","16","17","18","19","20")))Warning in DoHeatmap(combined, features = ghc, cells = WhichCells(combined, :
The following features were omitted as they were not found in the scale.data
slot for the integrated assay: FGF8, LMOD3, SCL26A5, SKOR1, FSIP1, BDNF,
TMPRSS3, ESPN, GFI1, ANXA4, MYO7A
Number of cells per cluster:
cellCountperCluster <- data.frame(id = Idents(combined))
barplot = ggplot(data=cellCountperCluster, aes(x=id)) +
geom_bar() +
geom_text(stat='count', aes(label=..count..), vjust=-1) +
theme_minimal()
barplot + labs(x="Cluster", y = "Cell count")Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(count)` instead.
DotPlot(combined, features=ghc) + coord_flip() + theme(axis.text.x = element_text(size = 8)) Warning: Found the following features in more than one assay, excluding the
default. We will not include these in the final data frame: MYO7A, ANXA4, GFI1,
ESPN, TMPRSS3, BDNF, FSIP1, SKOR1, LMOD3, FGF8
Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
The following requested variables were not found (10 out of 11 shown): MYO7A,
ANXA4, GFI1, ESPN, TMPRSS3, BDNF, FSIP1, SKOR1, SCL26A5, LMOD3
for (marker in ghc){
tryCatch({print(print(VlnPlot(combined, marker)))},
error=function(e){print("ERROR")},
warning=function(w){print(paste(marker, "not present"))})
}[1] "MYO7A not present"
[1] "ANXA4 not present"
[1] "GFI1 not present"
[1] "ESPN not present"
[1] "TMPRSS3 not present"
[1] "BDNF not present"
[1] "FSIP1 not present"
[1] "SKOR1 not present"
[1] "ERROR"
[1] "LMOD3 not present"
[1] "FGF8 not present"
cells.use <- WhichCells(combined, idents = '20')
DimPlot(combined, reduction = "umap", group.by = "ident", cells.highlight = cells.use, sizes.highlight = 0.3) + NoLegend()Cluster 20 is likely to be HC cluster
oep <- c(
'EPCAM',
'CDH1',
'SOX2',
'SIX1',
'OC90',
'SOX10',
'FBXO2',
'LMX1A',
'PAX2',
'PAX8',
'DLX5',
'GBX2',
'JAG1',
'TBX2',
'COL9A2',
'OTOA',
'MYO5C',
'OTOL1',
'USH1C',
'PCDH9')for (marker in oep){
tryCatch({print(FeaturePlot(combined, reduction = "umap", features = marker, order= TRUE))},
error=function(e){print("ERROR")},
warning=function(w){print(paste(marker, "not present"))})
}[1] "CDH1 not present"
[1] "SOX2 not present"
[1] "DLX5 not present"
[1] "GBX2 not present"
[1] "JAG1 not present"
[1] "MYO5C not present"
DoHeatmap(combined, features = ghc)Warning in DoHeatmap(combined, features = ghc): The following features were
omitted as they were not found in the scale.data slot for the integrated assay:
FGF8, LMOD3, SCL26A5, SKOR1, FSIP1, BDNF, TMPRSS3, ESPN, GFI1, ANXA4, MYO7A
DotPlot(combined, features=oep) + coord_flip() + theme(axis.text.x = element_text(size = 8)) Warning: Found the following features in more than one assay, excluding the
default. We will not include these in the final data frame: CDH1, SOX2, DLX5,
GBX2, JAG1, MYO5C
Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
The following requested variables were not found: CDH1, SOX2, DLX5, GBX2, JAG1,
MYO5C
for (marker in oep){
tryCatch({print(VlnPlot(combined, marker))},
error=function(e){print("ERROR")},
warning=function(w){print(paste(marker, "not present"))})
}[1] "CDH1 not present"
[1] "SOX2 not present"
[1] "DLX5 not present"
[1] "GBX2 not present"
[1] "JAG1 not present"
[1] "MYO5C not present"
Cluster 13 is likely to be OEP cells
hc_stei <- c(
"ATOH1",
"MYO7A",
"OTOF",
"STRC",
"ESPN",
"GPX2",
"PCDH15",
"CDH23",
"USH2A",
"POU4F3",
"CABP2",
"GFI1",
"USH1C",
"RIPOR2",
"MYO6",
"MYO15A",
"CIB2",
"PCP4",
"CALM2",
"LHFPL5"
)oep_stei <- c(
"PAX8",
"PAX2",
"OC90",
"HMX3",
"DLX3",
"DLX5",
"SALL4",
"DUSP6",
"SPRY2",
"SIX1",
"EYA1",
"FOXG1",
"LMX1A",
"OTOA",
"APOE",
"SMOC2",
"SPARCL1",
"FBXO2",
"COL11A1",
"COL9A2"
)VlnPlot(combined, hc_stei, idents = c("20", "13"))Warning: Found the following features in more than one assay, excluding the
default. We will not include these in the final data frame: MYO7A, ESPN, CDH23,
CABP2, GFI1, LHFPL5
Warning in FetchData.Seurat(object = object, vars = features, slot = slot): The
following requested variables were not found: MYO7A, ESPN, CDH23, CABP2, GFI1,
LHFPL5
VlnPlot(combined, oep_stei, idents = c("20", "13"))Warning: Found the following features in more than one assay, excluding the
default. We will not include these in the final data frame: HMX3, DLX3, DLX5,
SALL4, DUSP6, SPRY2, FOXG1
Warning in FetchData.Seurat(object = object, vars = features, slot = slot): The
following requested variables were not found: HMX3, DLX3, DLX5, SALL4, DUSP6,
SPRY2, FOXG1